OHI-Northeast | OHI Science | Citation policy
knitr::opts_chunk$set(fig.width = 6, fig.height = 4, fig.path = 'figs/', message = FALSE, warning = FALSE)
source('~/github/ne-prep/src/R/common.R') ### an OHINE specific version of common.R
#install.packages("viridis")
library(viridis)
#install.packages("plotly")
library(plotly)
library(readxl)This script creates the Iconic Species status data layer for use in the Sense of Place: Iconic Species subgoal. A list of 33 iconic species was created following input from multiple folks in the region. The Iconic Species status layer calculates the conservation status score (0 = extinct, 1 = least concern) for each of the iconic species.
We use NatureServe and IUCN conservation status (aka extinction risk) information.
Load the list of our iconic species. This list was created manually through consultation with folks in the region about iconic Northeast species.
iconic_list <- read_csv("data/iconic_species_list.csv") %>%
mutate(common = tolower(common),
scientific = tolower(scientific))Species range (i.e. where the species is found) and conservation status information comes from a layer in the Biodiversity - Species subgoal. The spp_status_scores.csv contains information about each species in our region, their conservation status and the associated score between 0 and 1 (where 0 is low extinction risk and 1 is extinct). spp_rgns.csv contains info on where each species is found within the Northeast. Both of these files are created in the prep/bio/spp folder and these two .csv’s live in the ne-scores repository.
species_status <- read_csv("~/github/ne-scores/region/layers/spp_status_scores.csv") %>%
select(-X1) %>%
mutate(common = ifelse(common == "white shark", "great white shark", common)) %>%
filter(common %in% iconic_list$common)
unique(species_status$common)## [1] "alewife" "american lobster"
## [3] "american shad" "arctic tern"
## [5] "atlantic bluefin tuna" "atlantic cod"
## [7] "atlantic herring" "atlantic puffin"
## [9] "atlantic sturgeon" "bald eagle"
## [11] "bottlenose dolphin" "common tern"
## [13] "fin whale" "haddock"
## [15] "horseshoe crab" "humpback whale"
## [17] "least tern" "minke whale"
## [19] "north atlantic right whale" "osprey"
## [21] "roseate tern" "sandbar shark"
## [23] "sea scallop" "sperm whale"
## [25] "striped bass" "great white shark"
#for use later when expanding the list for where species are even if their conservation status isn't listed in those areas
species_rgns <- read_csv("~/github/ne-scores/region/layers/spp_rgns.csv") %>% select(-year)## [1] "american oyster" "blue crab" "bay scallop"
## [4] "atlantic surfclam" "soft shell clam" "northern quahog"
## [7] "atlantic salmon" "piping plover"
Filter dataset just for iconic species
iconic_species_status <- species_status %>%
mutate(score = 1-score) %>% #the original scores from the SPP status are 0 (good) to 1 (bad/extinct). Here I reverse this so closer to 1 = better
filter(!is.na(status)) %>%
left_join(rgn_data) %>%
select(-state, -area_km2) %>%
rename(state = state_abv) Did we get all 33 species?
## [1] "alewife" "american lobster"
## [3] "american shad" "arctic tern"
## [5] "atlantic bluefin tuna" "atlantic cod"
## [7] "atlantic herring" "atlantic puffin"
## [9] "atlantic sturgeon" "bald eagle"
## [11] "bottlenose dolphin" "common tern"
## [13] "fin whale" "haddock"
## [15] "horseshoe crab" "humpback whale"
## [17] "least tern" "minke whale"
## [19] "north atlantic right whale" "osprey"
## [21] "roseate tern" "sandbar shark"
## [23] "sperm whale" "striped bass"
## [25] "great white shark"
We only have 25 species in the spp_status_scores data. What are we missing?
## [1] "american oyster" "blue crab" "bay scallop"
## [4] "sea scallop" "atlantic surfclam" "soft shell clam"
## [7] "northern quahog" "atlantic salmon" "piping plover"
Most of the species we are missing are a harvested (commercially fished) species, except for Piping plover. I’m going to see if Nature Serve has this species.
#install.packages("natserv")
library(natserv)
options(NatureServeKey = Sys.getenv("NatureServeKey"))
id <- ns_search(x = "Charadrius melodus")$globalSpeciesUid
id_dat <- ns_data(uid = id)
#state level conservation status
state <- id_dat[[1]]$conservationStatus$natureserve$nationalStatuses$US$subnationalStatuses
#us rank for region 12
pp_us <- id_dat[[1]]$conservationStatus$natureserve$nationalStatuses$US$rank
#create an empty dataframe to cycle through each state
out <- data.frame(state = NA,
rank = NA)
for(j in 1:length(state)){
ST <- state[[j]]$subnationCode
if(ST %in% c("CT", "NY", "MA", "ME", "RI", "NH")){
state_rank <- state[[j]]$rank
df <- data.frame(state = ST, rank = state_rank, stringsAsFactors = F)
}else{
df <- data.frame(state = NA, rank = NA, stringsAsFactors = F)
}
out <- rbind(out, df) %>%
filter(!is.na(state))
}
#add in the US rank
out <- out %>%
add_row(state = "USA", rank = pp_us)
#read in natureserve scores for the status
ns_scores <- read_csv("~/github/ne-prep/prep/bio/spp/data/natserv_status_scores.csv") %>% select(-X1)
pp <- out %>%
left_join(ns_scores, by = c("rank" = "status")) %>%
left_join(rgn_data, by = c("state" = "state_abv")) %>%
mutate(common = "piping plover",
sciname = "Charadrius melodus",
year = 2017,
status_scale = state,
score = 1-score) %>%
select(common, sciname, status = rank, status_scale, rgn_name, rgn_id, score, year) %>%
mutate(rgn_name = ifelse(status_scale == "USA", "Northeast", rgn_name),
rgn_id = ifelse(status_scale == "USA", 12, rgn_id))We will use our stock scores for these species where available. The nmfs_stock_scores.csv dataset was created in the Seafood Provision - Wild-Caught Fisheries subgoal data prep.
stock_scores <- read_csv("~/github/ne-prep/prep/fis/data/nmfs_stock_scores.csv") %>%
select(year, stock, b_bmsy) %>%
mutate(score = ifelse(b_bmsy >=1, 1, b_bmsy)) %>%
separate(stock, into = c("common", "area"), sep = " - ") %>%
filter(tolower(common) %in% missing) %>%
select(common, score, year)
unique(stock_scores$common)## [1] "Atlantic salmon" "Atlantic surfclam" "Sea scallop"
Only three of the species have stock assessments from NMFS that we can use as scores. So looks like we are missing american oyster, blue crab, bay scallop, soft shell clam, and quahog.
We don’t care about how big or small the species range map is, if a species exists within an OHI region we will count it there.
## # A tibble: 11 x 3
## rgn_id common sciname
## <dbl> <chr> <chr>
## 1 12 sea scallop placopecten magellanicus
## 2 1 sea scallop placopecten magellanicus
## 3 2 sea scallop placopecten magellanicus
## 4 3 sea scallop placopecten magellanicus
## 5 4 sea scallop placopecten magellanicus
## 6 6 sea scallop placopecten magellanicus
## 7 7 sea scallop placopecten magellanicus
## 8 8 sea scallop placopecten magellanicus
## 9 9 sea scallop placopecten magellanicus
## 10 10 sea scallop placopecten magellanicus
## 11 11 sea scallop placopecten magellanicus
We only have sea scallop identified to regions within the Northeast. That means we are missing salmon and surfclam. For now let’s assume they are everywhere. Combine rgn_areas with the scores
salm_surf <- stock_scores %>%
filter(common != "Sea scallop") %>%
mutate(rgn_id = 1,
common = tolower(common)) %>%
group_by(common, score) %>%
complete(rgn_id = 1:11, year) %>%
mutate(sciname = case_when(
common == "atlantic salmon" ~ "Salmo salar",
common == "atlantic surfclam" ~ "Spissula solidissima"
))
#join the salmon and surfclam dataset with sea scallops
salm_surf_scallop <- stock_scores %>%
mutate(common = tolower(common)) %>%
filter(common == "sea scallop") %>%
left_join(fish_rgns) %>%
bind_rows(salm_surf) %>%
filter(year == 2017) %>% #the score is the same for these species across all years
left_join(rgn_data) %>%
select(-area_km2, -state) %>%
rename(state = state_abv)Some species are not showing up in certain regions even though they are listed by the state in those areas. This could be due to the fact that species distribution models are not accurate enough to be captured in smaller regions like Connecticut or Long Island Sound. To account for this, we use a dataset provided by Emily Shumchenia that lists species in the portal and whether or not they are listed by state.
emily_data <- read_excel(file.path(dir_anx, "_raw_data/species/KLW_ES_Data_gaps_Northeast_species_list_9.29.16.xlsx"), sheet = "all species", skip = 1) %>%
mutate(low_species = tolower(`Species - common name`))Filter just for iconic species
em_spp <- emily_data %>%
filter(low_species %in% c(iconic_list$common, "atlantic sea scallop", "surf clams", "ocean quahog", "white shark")) %>%
mutate(common = case_when(
low_species == "atlantic sea scallop" ~ "sea scallop",
low_species == "surf clams" ~ "atlantic surfclam",
low_species == "ocean quahog" ~ "northern quahog",
low_species == "white shark" ~ "great white shark",
TRUE ~ as.character(low_species)
))
setdiff(iconic_list$common, em_spp$common)## [1] "american oyster" "blue crab" "horseshoe crab"
## [4] "bay scallop" "bald eagle" "bottlenose dolphin"
## [7] "soft shell clam"
Find states where these species are listed
em_states <- em_spp %>%
select(common, `E, T, SC`) %>%
filter(!is.na(`E, T, SC`),
common != "atlantic salmon") %>% #we want to use the stock assessment data for atlantic salmon
separate(`E, T, SC`, into = c("state", "state2", "state3", "state4", "state5", "state6"), sep = ", |;") %>%
gather(key = "label", value = "state", -common) %>%
select(-label) %>%
filter(!is.na(state)) %>%
separate(state, into = c("state", "status"), sep = " ") %>%
mutate(state = ifelse(state == "", status, state),
status = ifelse(status==state, NA, status),
status_score = case_when(
status == "(E)" ~ 0.8,
status == "(T)" ~ 0.6,
status == "(SC)" ~ 0.4,
TRUE ~ NA_real_
),
score = 1-status_score) %>%
left_join(rgn_data, by = c("state" = "state_abv")) %>%
select(common, rgn_id, state, score) %>%
mutate(source = "emily")Combine this with the data we already have.
For species that are listed differently between Emily’s data and Natureserve, we defer to Emily. The following is listed on Natureserves website and states that their updates are latent. > U.S. & Canada State/Province Status Due to latency between updates made in state, provincial or other NatureServe Network databases and when they appear on NatureServe Explorer, for state or provincial information you may wish to contact the data steward in your jurisdiction to obtain the most current data. Please refer to our Distribution Data Sources to find contact information for your jurisdiction.
update_df <- spp_rgns_scores1 %>%
select(common, rgn_id, state = status_scale, score) %>%
mutate(source = "natureserve") %>%
bind_rows(em_states) %>%
group_by(common, rgn_id, state) %>%
mutate(count = n()) %>%
ungroup() %>%
spread(source, score) %>%
rowwise() %>%
mutate(final_score = case_when(
common == "atlantic salmon" & !is.na(emily) ~ natureserve, #we want to use the stock score
count == 1 ~ max(emily, natureserve, na.rm = T),
count == 2 & !is.na(emily) ~ emily,
count == 2 & is.na(emily) ~ natureserve
),
final_score = ifelse(is.infinite(final_score), NA, final_score)) %>%
select(common, rgn_id, state, score = final_score)How many of our iconic species do we have?
Look at where these species are found. For example, bottlenose dolphin is only listed in MA and NY - is this true?
spp_rgns <- species_rgns %>%
filter(common %in% update_df$common) %>%
left_join(rgn_data, by = "rgn_id") %>%
select(rgn_id, common, state = state_abv) %>%
mutate(score = NA) %>%
filter(!is.na(state))#combine the species regions dataset with the species regions scores. then drop the rows that duplicate regions we already have
spp_rgns_scores <- bind_rows(spp_rgns, update_df) %>%
group_by(common, rgn_id) %>%
mutate(count = n()) %>%
ungroup() %>%
group_by(state, common, rgn_id) %>%
mutate(m = max(score, na.rm = T)) %>%
ungroup() %>%
mutate(drop = case_when(
is.infinite(m) ~ 0,
is.na(score) & count == 2 & !is.infinite(m) ~ 1,
TRUE ~ 0
)) %>%
filter(drop == 0) %>%
distinct() %>%
select(-count, -drop, -m)
#pull out the US national status for each species, we will use these scores for species/regions without a state specific status
us_status <- spp_rgns_scores %>%
filter(state == "USA") %>%
spread(state, score) %>%
select(-rgn_id)For those species in a region where they are found but without a state status, use the USA designated one. Here I also add Great White Shark to each of the regions in the Northeast. We only have scores for USA/IUCN but need to assign it to all the regions.
shark <- data.frame(rgn_id = c(5:11),
common = "great white shark",
state = NA)
df_us <- spp_rgns_scores %>%
bind_rows(shark) %>%
left_join(us_status, by = "common") %>%
rowwise() %>%
mutate(fscore = ifelse(is.na(score), USA, score),
year = 2017) %>%
select(rgn_id, common, score = fscore) %>%
filter(rgn_id < 12,
!is.na(score)) Use USA designated status for region 12, the whole Northeast
We need to add all years even though we dont have any information that tells us status changes. So these scores will all be the same over the entire time period
We only assess Sense of Place for coastal regions so we need to set offshore replace all offshore region scores with NA.
ico_layer <- df_us %>%
bind_rows(ne_status) %>%
mutate(year = 2005) %>%
group_by(common, rgn_id, score) %>%
complete(year = 2005:2017) %>%
ungroup() %>%
left_join(rgn_data) %>%
mutate(score = ifelse(rgn_id < 5, NA, score),
rgn_name = ifelse(rgn_id == 12, "Northeast", rgn_name)) %>%
select(rgn_id, rgn_name, year, common, score)ggplot(ico_layer %>%
filter(!is.na(score)), aes(x = year, y = score, color = common)) +
geom_line() +
theme_bw() +
facet_wrap(~rgn_name) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))Since we see no changes over time let’s just look at 2017 in table form
Need to create a dataframe for background of heat map so that missing values are not blank. Dataframe below has every iconic species in every region
possible_sp <- table_df %>%
select(common) %>%
distinct() %>%
uncount(11, .id = "n", .remove = F) %>%
mutate(as.numeric(n)) %>%
filter(n > 4) #only want coastal regions
regions <- table_df %>%
select(rgn_id, rgn_name) %>%
distinct() %>%
mutate(n = rgn_id)
heatmap_background <- left_join(possible_sp, regions, by = c("n")) %>%
select(common, rgn_id, rgn_name) Create dataframe that matches scores with status
status_num <- read.csv("data/status_numbers.csv") %>%
select(-X)
table_df2 <- table_df %>%
mutate(present = TRUE) %>%
right_join(heatmap_background, by = c("rgn_id", "rgn_name", "common")) %>%
mutate(present = ifelse(is.na(present), FALSE, present)) %>%
group_by(common) %>%
mutate(av_score = mean(score, na.rm=TRUE)) %>%
ungroup() %>%
arrange(desc(av_score), common) %>%
mutate(rgn_name = as.factor(rgn_name)) %>%
transform(common = reorder(common, desc(av_score))) %>%
transform(id = as.numeric(factor(common))) %>%
transform(common = reorder(common, desc(id))) with status label
status_num <- read.csv("data/status_numbers.csv") %>%
select(-X) %>%
mutate(score = 1-icun_score)
order_status <- c("Least Concern", "Least Concern/Not Threatened", "Not Threatened", "Not Threatened/Vulnerable", "Vulnerable", "Vulnerable/Endangered", "Endangered", "Endangered/Critically Endangered", "Critically Endangered", "Critically Endangered/Extinct", "Extinct")
table_status <- table_df %>%
mutate(present = TRUE) %>%
left_join(status_num, by = c("score")) %>%
right_join(heatmap_background, by = c("rgn_id", "rgn_name", "common")) %>%
mutate(present = ifelse(is.na(present), FALSE, present),
status_long = as.character(status_long),
status_long = if_else(common == "Atlantic salmon", "Critically Endangered/Extinct", status_long),
status_long = as.factor(status_long)) %>%
group_by(common) %>%
mutate(av_score = mean(score, na.rm=TRUE)) %>%
ungroup() %>%
arrange(desc(av_score), common) %>%
mutate(rgn_name = as.factor(rgn_name)) %>%
transform(common = reorder(common, desc(av_score))) %>%
transform(id = as.numeric(factor(common))) %>%
transform(common = reorder(common, desc(id))) %>%
rename(Status = status_long) %>%
mutate(Status = factor(Status, levels = order_status))
write.csv(table_status, "data/heatmap_df.csv")Heatmap with scores
ico_heatmap_score <- ggplot(data = table_df2, aes(x = rgn_name, y = common)) +
# geom_tile(fill ='grey90') +
geom_tile(aes(fill = score, alpha = present)) +
scale_fill_viridis(direction = -1, ) +
scale_alpha_discrete(guide = 'none') +
theme_dark()+
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.title = element_blank(),
panel.grid = element_blank(),
legend.title = element_blank()) +
coord_cartesian(expand=FALSE)
ggsave("figs/ico_heatmap_score.jpg", width=7, height=6, dpi=300)Heatmap with status level
ico_heatmap <- ggplot(data = table_status, aes(x = rgn_name, y = common)) +
#geom_tile(fill ='grey90') +
geom_tile(aes(fill = Status)) +
scale_fill_viridis_d(direction = 1, ) +
scale_alpha_discrete(guide = 'none') +
theme_dark()+
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.title = element_blank(),
panel.grid =element_blank() )+
coord_cartesian(expand=FALSE)
# theme(legend.position='none')
#guides(fill=guide_legend(title=NULL))+
#guides(alpha= FALSE)
ico_heatmap